/* OPTIONS OBS=5000  NOREPLACE ;         */


/** This programs computes differences in within-industry 75th/25th percentile of TFPR for each industry-year for:

	1.  2002 and 2007 nonimputed minus Bureau-completed.
	2.  2002 and 2007 CART-completed minus Bureau-completed.

  INPUT FILES:    

	allcmf.allcmf0207_tfpr_iqrs
	allcmf.gooddataalinds0207_tfpr_iqrs
	allcmf.imputes_0207_tfpr_iqr_means

  OUTPUT FILES:


**/

%include "ASMimplibs.sas";




data nonimputes (keep = year NAICS tfpr_iqr tfpr7525);
 set allcmf.gooddataalinds0207_tfpr_iqrs;
run;

data CBcompleted (keep = year NAICS tfpr_iqr tfpr7525);
 set allcmf.allcmf0207_tfpr_iqrs;
run;

data CARTcompleted (keep = year NAICS tfpr_iqr_mean tfpr7525_mean);
 set allcmf.imputes_0207_tfpr_iqr_means;

proc datasets library=work;
 modify nonimputes;
  rename tfpr7525 = tfpr7525_nonimp;
  rename tfpr_iqr = tfpr_iqr_nonimp;
 modify CBcompleted;
  rename tfpr7525 = tfpr7525_CB;
  rename tfpr_iqr = tfpr_iqr_CB;
 modify CARTcompleted;
  rename tfpr_iqr_mean = tfpr_iqr_CARTmean;
  rename tfpr7525_mean = tfpr7525_CARTmean;
run;

proc sort data = nonimputes; by year NAICS; run;
proc sort data = CBcompleted; by year NAICS; run;
proc sort data = CARTcompleted; by year NAICS; run;


data CBnonimpCART;
 merge nonimputes (in=a) CBcompleted (in=b) CARTcompleted (in=c);
 by year NAICS;
 IF a and b and c;
run;

data CBnonimpCART;
 set CBnonimpCART;
  nonimp_CB_tfpr7525_diff = tfpr7525_nonimp - tfpr7525_CB;
  CART_CB_tfpr7525_diff = tfpr7525_CARTmean - tfpr7525_CB;
  nonimp_CB_tfpr7525_ratio = tfpr7525_nonimp/tfpr7525_CB;
  CART_CB_tfpr7525_ratio = tfpr7525_CARTmean/tfpr7525_CB;
  nonimp_CB_tfpr7525_logdiff = log(nonimp_CB_tfpr7525_ratio);
  CART_CB_tfpr7525_logdiff = log(CART_CB_tfpr7525_ratio); 
  if  nonimp_CB_tfpr7525_logdiff > 0.10 then nonimp_disp_gt_CB=1;
  else nonimp_disp_gt_CB=0;
  if  CART_CB_tfpr7525_logdiff > 0.10 then CART_disp_gt_CB=1;
  else CART_disp_gt_CB=0;
  if  nonimp_CB_tfpr7525_logdiff > 0.0 then nonimp_CB_diff_pos=1;
  else nonimp_CB_diff_pos=0;
  if  CART_CB_tfpr7525_logdiff > 0.0 then CART_CB_diff_pos=1;
  else CART_CB_diff_pos=0;
run;  


proc sort data=CBnonimpCART; by year; run;


proc means data=CBnonimpCART N NMISS mean;
  var nonimp_disp_gt_CB ;
by year;
title1 "Table 1: Share of industries in which TFPR 75-25 ratio";
title2 "in non-imputed data is > 10% higher than in";
title3 "Bureau-completed data";
run;

proc means data=CBnonimpCART N NMISS mean;
  var  CART_disp_gt_CB;
by year;
title1 "Table 2: Share of industries in which TFPR 75-25 ratio";
title2 "in CART-completed data is > 10% higher than in";
title3 "Bureau-completed data";
run;

proc freq data=CBnonimpCART;
 tables nonimp_disp_gt_CB*CART_disp_gt_CB;
by year;
title1 "Cross-tab of the two 10-log-point industry";
title2 "disperson difference subsets";
title3 "to show they they overlap";
run;


proc means data=CBnonimpCART N NMISS mean;
  var nonimp_CB_diff_pos ;
by year;
title1 ": Share of industries in which TFPR 75-25 ratio";
title2 "in non-imputed data is higher than in";
title3 "Bureau-completed data";
run;

proc means data=CBnonimpCART N NMISS mean;
  var  CART_CB_diff_pos;
by year;
title1 "Share of industries in which TFPR 75-25 ratio";
title2 "in CART-completed data is higher than in";
title3 "Bureau-completed data";
run;


proc means data= CBnonimpCART median mean q1 q3 min p1 p10 ;
 var /* nonimp_CB_tfpr7525_diff CART_CB_tfpr7525_diff */ nonimp_CB_tfpr7525_logdiff CART_CB_tfpr7525_logdiff; 
by year;
title1 "Table 3: Distribution of same-industry log differences";
title2 "NAIC6 industry IQRs of TFPR ";
title3 "nonimputed data minus Bureau-completed and ";
title4 "CART-completed (mean) minus Bureau-completed";
run;

title1 "";
title2 "";
title3 "";
title4 "";

